************************************************************
* Title: rwanda_teacher_endline_jde.do
* Author: Todd Pugatch
* Last update: June 4 2024
*
* Description: data cleaning for Blimpo and Pugatch, "Entrepreneurship Education
*	and Teacher Training in Rwanda," Stage 2 Registered report, Journal of 
*	Development Economics
*
* Inputs: teacher_endline_clean_final_NOPIIs.dta
* Outputs: teacher_endline_clean_jde.dta
* Notes: cleans teacher endline survey
****************************************************************

local start=`"$S_TIME"'
clear
clear matrix
clear mata
graph drop _all
program drop _all
cap log close
set more off

* Set directories 
*global main "[SET MAIN DIRECTORY HERE]"
	global rawdata "$main/01_data/01_raw"
	global cleandata "$main/01_data/02_clean"
	global dofiles "$main/02_dofiles"
	global results "$main/03_results"
	global out "$main/04_output"


* define exchange rate to convert all variables in FRW into USD
local xrate=868.0579 /*exchange rate on 1 July 2018: http://www.exchangerates.org.uk/USD-RWF-exchange-rate-history-full.html*/

/****************************************************************
					LOAD AND PREPARE DATA

variable naming and label conventions: 
	--names:	use [bl/el] suffix for baseline/endline
				omit if variable is an identifier that could be used to match
					across datasets, like treatment status or school code
	--labels: 	use B/E for baseline/endline 
				use [H/T/S]Q for head teacher/teacher/student questionnaire
	--example: enrollment, as reported by head teacher at baseline in item #123
		name: enroll_bl
		label: "BHQ123: enrollment"
		
*****************************************************************/

qui use "$rawdata/teacher/teacher_endline_clean_final_NOPIIs.dta", clear

************************************************************************
*											   						   *
* 	       SECTION 1: Identifying information			   			   *
* 											   						   *
************************************************************************
/*survey identifying information: need to mark as endline*/
local X "uniqueid interview_starttime enumerator_id_113 consent_e enumerator_name_113 submissiondate starttime endtime teacherid reenter_teacherid schoolid reenter_schoolid interview_endtime formdef_version key piiid" 
foreach x in `X' {
	qui gen `x'_el=`x'
	lab var `x'_el "`x', endline"
}

/*correct school label errors, according to "Manual Checking Report.docx" */
* school id
qui gen long school_code=schoolid /*long format necessary because GS Rebero has school code longer than 7 digits*/
qui gen schoolname_el=school_name_106
lab var schoolname_el "school name, as reported in endline (school_name_106)"
lab val schoolname_el schoollab

/*teacher ID quality check: 
	1. does teacherid match reenter_teacherid?*/
format teacherid %12.0g
list teacherid reenter_teacherid schoolid school_name_106 if teacherid!=reenter_teacherid

/*	2. do first digits of teacherid match school code?
	--teacherid=50331072, schoolid=503107
		--matches same combination of codes in teacher observation data
		--DECISION: leave as is	*/
qui gen x=substr(uniqueid,1,6) if school_code!=3503001|school_code!=5030403|school_code!=40519134 /*exclude schools with IDs>6 digits*/
qui replace x=substr(uniqueid,1,7) if school_code==3503001|school_code==5030403
qui replace x=substr(uniqueid,1,8) if school_code==40519134
qui destring x, replace
count if school_code!=x & school_code!=.
sort school_code teacherid
list school_code school_name_106 x teacherid reenter_teacherid if school_code!=x & school_code!=. 
drop x
	
/*update discrepancies between treatment status between sampling spreadsheet and dataset*/
qui replace group_111=2 if school_code==301112 
qui replace group_111=1 if school_code==403110 
qui replace group_111=1 if school_code==403037 
qui replace group_111=1 if school_code==403050 
qui replace group_111=1 if school_code==305012	
qui replace group_111=1 if school_code==504114	

/*create district & province IDs to match Census/map codes*/
qui gen district_id=.
qui replace district_id=57 if district_101==2 
qui replace district_id=53 if district_101==3
qui replace district_id=54 if district_101==4 
qui replace district_id=56 if district_101==5 
qui replace district_id=42 if district_101==11 
qui replace district_id=45 if district_101==8 
qui replace district_id=43 if district_101==7 
qui replace district_id=41 if district_101==9 
qui replace district_id=31 if district_101==6 
qui replace district_id=35 if district_101==2 
qui replace district_id=34 if district_101==10 
lab var district_id "District ID, Census code"

qui gen province_id=.
qui replace province_id=5 if province_100==1 /*Eastern*/
qui replace province_id=4 if province_100==2 /*Northern*/
qui replace province_id=3 if province_100==3 /*Western*/
lab var province_id "Province ID, Census code"

/*school type*/
/*note that public/private status doesn't appear in questionnaire, but copied over from other data*/
qui gen public=(publ_priv_108==1)
qui gen private=(publ_priv_108==2)
qui gen private_pubaided=(publ_priv_108==3)
qui gen nonpublic=(publ_priv_108==2|publ_priv_108==3)
qui gen treatment=(group_111==1)
qui replace treatment=. if group_111==.
sort district_101 public
qui egen strata=group(district_101 public)
lab def treatment 0 "control" 1 "treatment"
lab val treatment treatment 
lab var public "BTQ108: public school"
lab var private "BTQ108: private school"
lab var private_pubaided "BTQ108: part public, part private school"
lab var nonpublic "BTQ108: non-public school"
lab var strata "stratification ID (district and public/non-public)"

/*gender, grade level taught, past survey history*/
qui gen female_el=gender_200 /*labeled as no/yes, presumably "female" as in baseline*/
lab var female_el "ETQ111: female"
forval g=1/6 {
	qui gen entrepreneurship_S`g'_el=class_level_112_S`g'_e
	lab var entrepreneurship_S`g'_el "ETQ112: teach S`g' entrepreneurship"
}
qui gen inbaseline_el=baseline_112_e
lab var inbaseline_el "ETQ112: participated in baseline survey"

************************************************************************
*											   						   *
* 	       SECTION 2: Teacher experiences & training	  			   *
* 											   						   *
************************************************************************
/*demographics & experience*/
qui gen age_el=age_201
qui gen qualified_el=(qual_203==1)
qui gen exper_entrepreneurship_el=year_start_202_e
qui gen numstudents_tot_el=student_number_203_e
qui gen numstudents_m_el=malestud_212
qui gen numstudents_f_el=femalestud_212
lab var age_el "ETQ201: age"
lab var qualified_el "ETQ203: qualified teacher"
lab var exper_entrepreneurship "ETQ205: years taught entrepreneurship"
lab var numstudents_tot_el "ETQ206/209: number of S4/S6 entrepreneurship students taught"
lab var numstudents_m_el "ETQ206/209: number of male S4/S6 entrepreneurship students taught"
lab var numstudents_f_el "ETQ206/209: number of female S4/S6 entrepreneurship students taught"

/*CBC training history: overall*/
qui gen trained_el=(traincurr_220==1)
qui gen trained_reb_el=(fromwhom_210_1_reb==1 & traincurr_220==1)
qui gen trained_educate_el=(fromwhom_210_2_educate==1 & traincurr_220==1)
qui gen trained_rebandeducate_el=(fromwhom_210_3_reb_educate==1 & traincurr_220==1)
qui gen trained_e_or_rebande_el=((fromwhom_210_2_educate==1|fromwhom_210_3_reb_educate==1) & traincurr_220==1)
lab var trained_el "ETQ212: received training on CBC (competence-based curriculum)"
lab var trained_reb_el "ETQ213: received CBC training from REB"
lab var trained_educate_el "ETQ213: received CBC training from Educate!"
lab var trained_rebandeducate_el "ETQ213: received CBC training from REB & Educate!"
lab var trained_e_or_rebande_el "ETQ213: received CBC training from Educate! or REB & Educate!"

/*CBC training history: 2016*/
qui gen trained_num_2016_el=numb_training_211_e
qui gen trained_days_2016_el=days_traning_212_e
foreach x in num days {
	qui replace trained_`x'_2016_el=0 if trained_`x'_2016_el==-99|trained_`x'_2016_el==.
}
qui gen trained_reb_2016_el=(fromwhom_213_1_reb==1 & trained_num_2016_el>0)
qui gen trained_educate_2016_el=(fromwhom_213_2_educate==1 & trained_num_2016_el>0)
qui gen trained_rebandeducate_2016_el=(fromwhom_213_3_reb_educate==1 & trained_num_2016_el>0)
qui gen trained_e_or_rebande_2016_el=((fromwhom_213_2_educate==1|fromwhom_213_3_reb_educate==1) & trained_num_2016_el>0)
lab var trained_num_2016_el "ETQ214: number of CBC trainings attended, 2016" 
lab var trained_days_2016_el "ETQ215: days of CBC trainings attended, 2016" 
lab var trained_reb_2016_el "ETQ216: received CBC training from REB, 2016"
lab var trained_educate_2016_el "ETQ216: received CBC training from Educate!, 2016"
lab var trained_rebandeducate_2016_el "ETQ216: received CBC training from REB & Educate!, 2016"
lab var trained_e_or_rebande_2016_el "ETQ216: received CBC training from Educate! or REB & Educate!, 2016"
 
/*CBC training history: 2017*/
qui gen trained_num_2017_el=training_2017 /*some may have interpreted as "days," not trainings*/
qui replace trained_num_2017_el=0 if training_2017==-99|training_2017==99|training_2017==.
qui gen trained_reb_2017_el=(fromwhom_213_1_reb==1 & trained_num_2017_el>0)
qui gen trained_educate_2017_el=(fromwhom_213_2_educate==1 & trained_num_2017_el>0)
qui gen trained_rebandeducate_2017_el=(fromwhom_213_3_reb_educate==1 & trained_num_2017_el>0)
qui gen trained_e_or_rebande_2017_el=((fromwhom_213_2_educate==1|fromwhom_213_3_reb_educate==1) & trained_num_2017_el>0)
lab var trained_num_2017_el "ETQ217: number of CBC trainings attended, 2017" 
lab var trained_reb_2017_el "ETQ219: received CBC training from REB, 2017"
lab var trained_educate_2017_el "ETQ219: received CBC training from Educate!, 2017"
lab var trained_rebandeducate_2017_el "ETQ219: received CBC training from REB & Educate!, 2017"
lab var trained_e_or_rebande_2017_el "ETQ219: received CBC training from Educate! or REB & Educate!, 2017"

************************************************************************
*											   						   *
* 	       SECTION 3: Curricular Knowledge & perceptions			   *
* 											   						   *
************************************************************************

/*other variables not modified: rec_new_curr_401_e 
								reb_textbook_402_e 
								much_covered_403_e
								unit_class_e
								hours_entr_404_e (not much variation)
								nbrobstrain_222_e 
								nbrothobstrain_223_e
								balenc_sheet_415*	*/
qui gen train_obs_el=(obstrain_222_e==1|othobstrain_223_e==1)
qui gen entre_lessonplan_shown_el=(writtentr_224==3)
qui gen entre_lessonplan_notshown_el=(writtentr_224==2)
qui gen entre_lessonplan_none_el=(writtentr_224==1)
qui gen lessonnotes_shown_el=(writtlesso_225==3)
qui gen lessonnotes_notshown_el=(writtlesso_225==2)
qui gen lessonnotes_none_el=(writtlesso_225==1)
qui egen mrktskl_true_el=anycount(mrktskl_400_DevProBrand mrktskl_400_Advert mrktskl_400_VerbalProm), v(1)
qui egen mrktskl_false_el=anycount(mrktskl_400_KeepingAccount mrktskl_400_BusinessReg), v(1)
qui gen mrktskl_true_pct_el=mrktskl_true_el/3
qui gen mrktskl_false_pct_el=mrktskl_false_el/2
qui egen buspln_true_el=anycount(busplnskl_401_BusiName busplnskl_401_MarketgPlan), v(1)
qui egen buspln_false_el=anycount(busplnskl_401_ReceiFromSales busplnskl_401_PandLStatemt busplnskl_401_Audit), v(1)
qui gen buspln_true_pct_el=buspln_true_el/2
qui gen buspln_false_pct_el=buspln_false_el/3
qui egen balancesht_true_el=anycount(balenc_sheet_415_e_assets balenc_sheet_415_e_liablties balenc_sheet_415_e_capital), v(1)
qui egen balancesht_false_el=anycount(balenc_sheet_415_e_profits balenc_sheet_415_e_marktplan balenc_sheet_415_e_totsales), v(1)
qui gen balancesht_true_pct_el=balancesht_true_el/3
qui gen balancesht_false_pct_el=balancesht_false_el/3
lab var train_obs_el "ETQ405/406: observed or observed by another teacher in last term"
lab var entre_lessonplan_shown_el "ETQ407: has written entrepreneurship lesson plan (shown)"
lab var entre_lessonplan_notshown_el "ETQ407: has written entrepreneurship lesson plan (not shown)"
lab var entre_lessonplan_none_el "ETQ407: does not have written entrepreneurship lesson plan"
lab var lessonnotes_shown_el "ETQ408: has lesson notes (shown)"
lab var lessonnotes_notshown_el "ETQ408: has lesson notes (not shown)"
lab var lessonnotes_none_el "ETQ408: does not have lesson notes"
lab var mrktskl_true_el "ETQ413: number of true marketing skills identified (of 3)" 	
lab var mrktskl_false_el "ETQ413: identified false marketing skill (of 2)" 
lab var mrktskl_true_pct_el "ETQ413: % of true marketing skills identified (of 3)" 	
lab var mrktskl_false_pct_el "ETQ413: % of false marketing skills identified (of 2)" 
lab var buspln_true_el "ETQ414 number of true business plan elements identified (of 2)" 	
lab var buspln_false_el "ETQ414: number of false business plan elements identified (of 3)" 
lab var buspln_true_pct_el "ETQ414: % of true business plan elements identified (of 2)" 	
lab var buspln_false_pct_el "ETQ414: % of false business plan elements identified (of 3)"
lab var balancesht_true_el "ETQ416 number of true balance sheet elements identified (of 2)" 	
lab var balancesht_false_el "ETQ416: number of false balance sheet elements identified (of 3)" 
lab var balancesht_true_pct_el "ETQ416: % of true balance sheet elements identified (of 2)" 	
lab var balancesht_false_pct_el "ETQ416: % of false balance sheet elements identified (of 3)"

qui egen lessonplan_index_el=rowmean(entre_lessonplan_shown_el lessonnotes_shown_el)
qui egen eknowledge_index_el=rowmean(mrktskl_true_pct_el buspln_true_pct_el balancesht_true_pct_el)
lab var lessonplan_index_el "ETQ407-408: mean of dummies for showing lesson plan & lesson notes"
lab var eknowledge_index_el "ETQ413/414/416: mean of entrepreneurship knowledge questions"

/*Skills Labs*/
qui gen lessonnotes_skillslab_el=(skillslab_408==1)
qui gen skillslab_definition_el=(def_skilllab_315==2)
qui gen skillslab_definition_mc_el=(skills_lab_def_411_e==2)
qui gen skillslabs_taught_el=skillslab_taught_412_e
lab var lessonnotes_skillslab_el "ETQ409: lesson plan included Skills Lab"
lab var skillslab_definition_el "ETQ411: knows definition of Skills Lab (open-ended)"
lab var skillslab_definition_mc_el "ETQ412: knows definition of Skills Lab (multiple choice)"
lab var skillslabs_taught_el "ETQ410: # of Skills Labs taught this term"

/*perceptions of curriculum*/
qui gen enjoytch_el=enjoy_teac_416_e
qui gen improvetch_el=empr_my_skills_418_e 
qui gen improvelrn_el=empr_learner_skills_419_e
qui gen enjoytchmr_el=new_vs_old_421_e
qui gen improvetchmr_el=skills_uneder_new_422_e  
qui gen improvelrnmr_el=lear_better_new_423_e
qui gen prepexambtr_el=lern_pass_better_424_e
foreach x in enjoytch improvetch improvelrn enjoytchmr improvetchmr improvelrnmr prepexambtr {
	if ("`x'"=="enjoytch") local lbl "ETQ418: enjoys teaching"
	else if ("`x'"=="improvetch") local lbl "ETQ419: teaching improved under"
	else if ("`x'"=="improvelrn") local lbl "ETQ420: learner skills improved under"
	else if ("`x'"=="enjoytchmr") local lbl "ETQ422: enjoys teaching more"
	else if ("`x'"=="improvetchmr") local lbl "ETQ423: teaching improved more"
	else if ("`x'"=="improvelrnmr") local lbl "ETQ424: learner skills improved more"
	else if ("`x'"=="prepexambtr") local lbl "EQT425: learners prepare for exams better under"
	qui gen `x'_strngagree_el=(`x'_el==1)
	qui gen `x'_agree_el=(`x'_el==2)
	qui gen `x'_neutral_el=(`x'_el==3)
	qui gen `x'_disagree_el=(`x'_el==4)
	qui gen `x'_strngdisagree_el=(`x'_el==5)
	qui gen `x'_atleastagree_el=(`x'_el==1|`x'_el==2)
	lab var `x'_el "`lbl' under CBC (1=strongly agree, 5=strongly disagree)"
	lab var `x'_strngagree_el "`lbl' CBC: strongly agree"
	lab var `x'_agree_el "`lbl' CBC: agree"
	lab var `x'_neutral_el "`lbl' CBC: neither agree nor disagree"
	lab var `x'_disagree_el "`lbl' CBC: disagree"
	lab var `x'_strngdisagree_el "`lbl' CBC: strongly disagree"
	lab var `x'_atleastagree_el "`lbl' CBC: agree or strongly agree"
	note `x'_el: "CBC is competency-based curriculum"
	if ("`x'"=="enjoytchmr"|"`x'"=="improvetchmr"|"`x'"=="improvelrnmr"|"`x'"=="prepexambtr") {
		note `x'_el: "conditional on teaching in pre-2016 knowledge-based curriculum"
	}
}	

/***************************************************
					Pedagogy
					
* classify these methods as "active learning":
	--question & answer
	--small group work
	--games
	--class discussion
	--activities outside classroom
	--practice exercises
	--experiment
	--student portfolio
***************************************************/
local active_mostcomf "meth_300_mostcomf_QandA meth_300_mostcomf_GroupWork meth_300_mostcomf_Games meth_300_mostcomf_Discussion meth_300_mostcomf_ActOutClass meth_300_mostcomf_PracticeEx meth_300_mostcomf_Experiment meth_300_mostcomf_Portfolio"
qui egen pedagogy_active_mostcomf_el=rowtotal(`active_mostcomf')
notes pedagogy_active_mostcomf_el: "active includes Q&A, small group work, games, class discussion, activities outside classroom, practice exercises, experiment, student portfolio"
notes pedagogy_active_mostcomf_el: "was intended to be a ranking, but no ranks applied"
lab var pedagogy_active_mostcomf_el "ETQ426: # active learning strategies among most comfortable methods (of 8)"


/*************************************************************************
 alternative classification: 
 classify these methods as "active learning":
	--small group work
	--games
	--research
	--activities outside classroom
	--practice exercises
	--experiment
	--student portfolio
local active2_mostcomf "meth_300_mostcomf_GroupWork meth_300_mostcomf_Games meth_300_mostcomf_Research 
	meth_300_mostcomf_ActOutClass meth_300_mostcomf_PracticeEx meth_300_mostcomf_Experiment 
	meth_300_mostcomf_Portfolio"
qui egen pedagogy_active2_mostcomf_el=rowtotal(`active2_mostcomf')
notes pedagogy_active2_mostcomf_el: "active (E! definition) includes small group work, games, research, activities outside classroom, practice exercises, experiment, student portfolio"
notes pedagogy_active_mostcomf_el: "was intended to be a ranking, but no ranks applied"
lab var pedagogy_active2_mostcomf_el "ETQ426: # active learning strategies among most comfortable methods (of 8, E! definition)" */


************************************************************************
*											   						   *
* 	     SECTION 5: Teacher income and job satisfaction			   	   *
* 											   						   *
************************************************************************

/*other occupation*/
qui gen moonlight_el=(othoccup_600==1)
qui gen moonlight_hrswk_el=hrsothoccup_601
qui replace moonlight_hrswk_el=0 if hrsothoccup_601==.
lab var moonlight_el "ETQ500: works in occupation outside teaching"
lab var moonlight_hrswk_el "ETQ501: hours/week in occupation outside teaching"

/*job satisfaction*/
forval i=1/9 {
	if (`i'==1) local x "salary"
	if (`i'==2) local x "workload"
	if (`i'==3) local x "currentpost"
	if (`i'==4) local x "jobsecurity"
	if (`i'==5) local x "appreciation"
	if (`i'==6) local x "status"
	if (`i'==7) local x "wrkenvrnmnt"
	if (`i'==8) local x "support"
	if (`i'==9) local x "joboverall"
	qui gen `x'_el=point`i'_606
	qui gen `x'_vdissatisfied_el=(point`i'_606==1)
	qui gen `x'_dissatisfied_el=(point`i'_606==2)
	qui gen `x'_satisfied_el=(point`i'_606==3)
	qui gen `x'_vsatisfied_el=(point`i'_606==4)
	qui gen `x'_unsatisfied_el=(point`i'_606==1|point`i'_606==2)
	qui gen `x'_atlstsatisfied_el=(point`i'_606==3|point`i'_606==4)
	lab var `x'_el "ETQ503: rating of `x' (1=very dissatisfied, 4=very satisfied)"
	lab var `x'_vdissatisfied_el "ETQ503: very dissatisfied with `x'"
	lab var `x'_dissatisfied_el "ETQ503: dissatisfied with `x'"
	lab var `x'_satisfied_el "ETQ503: satisfied with `x'"
	lab var `x'_vsatisfied_el "ETQ503: very satisfied with `x'"
	lab var `x'_unsatisfied_el "ETQ503: very dissatisfied or dissatisfied with `x'"
	lab var `x'_atlstsatisfied_el "ETQ503: very satisfied or satisfied with `x'"
}

* save data
qui gen insample_el=1
lab var insample_el "in endline sample"

qui compress
lab data "Teacher endline survey (2018), modified from cleaned data"
qui save "$cleandata/teacher_endline_clean_jde.dta", replace	

local end=`"$S_TIME"' 
di "`start'"
di "`end'"
